*WHAT'S ACROSS THE BORDER?
*JHA, NEUMARK, AND RODRIGUEZ-LOPEZ

/*
	This do file creates the master data to compare 129 commuting zones that span two states. It contains
	employment, number of establishments, payroll, and minimum wages from 1990 to 2016.
	I use the max_mw from VZ_state_annual.dta. 	
*/


include directories_build.do

* Load data at the czone-state-industry level
use "`datadir'`s'cbp_czone_state_20ind_sample.dta", clear

* Merge ranking data
merge m:1 sic87xx using "`cbpdir'`s'cbp_sic87xx_wagerank.dta"  
drop _merge

* Keep only two-state czones
keep if multi_two==1

* Use tsset to be able to easily work with two parts of each czone (czone part 0 and czone part 1)
set type double
gen czone_sic87xx=czone*10000+sic87xx
format %15.0g czone_sic87xx
gen czone_sic87xx_year=czone_sic87xx*10000+year
format %15.0g czone_sic87xx_year
sort czone_sic87xx_year state

tsset czone_sic87xx_year state

gen stcz=0
replace stcz=1 if L.state<state | L2.state<state | L3.state<state | L4.state<state | L5.state<state | L6.state<state | L7.state<state | L8.state<state | L9.state<state | L10.state<state | L11.state<state | L12.state<state | L13.state<state | L14.state<state | L15.state<state | L16.state<state | L17.state<state | L18.state<state | L19.state<state | L20.state<state | L21.state<state | L22.state<state | L23.state<state | L24.state<state | L25.state<state | L26.state<state | L27.state<state | L28.state<state | L29.state<state | L30.state<state | L31.state<state | L32.state<state | L33.state<state | L34.state<state | L35.state<state | L36.state<state | L37.state<state | L38.state<state | L39.state<state | L40.state<state | L41.state<state | L42.state<state | L43.state<state | L44.state<state | L45.state<state | L46.state<state | L47.state<state | L48.state<state | L49.state<state | L50.state<state | L51.state<state | L52.state<state | L53.state<state | L54.state<state | L55.state<state | L56.state<state | L57.state<state | L58.state<state | L59.state<state | L60.state<state
order year sic87xx czone state stcz
tsset czone_sic87xx_year stcz


*USE THE MAXIMUM MINIMUM WAGE
gen mw=max_mw

foreach b in "czone" "multi" "state" "emp" "ap" "qp1" "est" "mw" "totpop" "workagepop" "totemp" "totap" "totqp1" "totest" "wageap" "wageqp"  "totwageap" "totwageqp" "totwageapm" "totwageqpm" {
gen `b'_0=L.`b'
gen `b'_1=`b'
}


keep if stcz==1

drop czone multi state emp ap qp1 est mw totpop workagepop totemp totap totqp1 totest wageap wageqp totwageap totwageqp totwageapm totwageqpm 

label var sic87xx "20 industries"
label var sic87xx_desc "20 industries description"

foreach t in "0" "1" {
label var state_`t' "State FIPS code `t'"
label var est_`t' "Number of establishments `t'"
label var emp_`t' "Employment `t'"
label var qp1_`t' "Nominal first quarter payroll `t'"
label var ap_`t' "Nominal annual payroll `t'"
label var mw_`t' "Minimum wage `t'"
label var totest_`t' "Number of establishments (czone-state)"
label var totemp_`t' "Employment (czone-state)"
label var totqp1_`t' "Nominal first quarter payroll (czone-state)"
label var totap_`t' "Nominal annual payroll (czone-state)"
label var wageap_`t' "Nominal hourly wage using AP (czone-state-ind)"
label var wageqp_`t' "Nominal hourly wage using QP (czone-state-ind)"
label var totwageap_`t' "Nominal hourly wage using AP (czone-state)"
label var totwageqp_`t' "Nominal hourly wage using QP (czone-state)"
label var totwageapm_`t' "Nominal hourly wage using AP MINUS (czone-state)"
label var totwageqpm_`t' "Nominal hourly wage using QP MINUS (czone-state)"
label var totpop_`t' "Total population czone-state"
label var workagepop_`t' "Total working age population czone-state"

} 


label var wagerank90 "Wage ranking of industry in 1990"

drop stcz statename stateabb min_fed_mw min_mw mean_fed_mw mean_mw max_fed_mw max_mw state_min state_max

preserve

*** THREE-STATE 1
* Load data at the czone-state-industry level
use "`datadir'`s'cbp_czone_state_20ind_sample.dta", clear

* Merge ranking data
merge m:1 sic87xx using "`cbpdir'`s'cbp_sic87xx_wagerank.dta"  
drop _merge

* Keep only multi-state czones
keep if three==1


gen id=0

*In three-state czones, drop the smallest (in terms of employment) czone
replace id=1 if czone==11304 & state==11
replace id=1 if czone==17100 & state==21
replace id=1 if czone==17400 & state==24
replace id=1 if czone==23600 & state==17
replace id=1 if czone==28001 & state==19
replace id=1 if czone==29901 & state==20
replace id=1 if czone==30300 & state==5
replace id=1 if czone==31006 & state==20

replace id=2 if czone==11304 & state==24
replace id=2 if czone==17100 & state==39
replace id=2 if czone==17400 & state==42
replace id=2 if czone==23600 & state==19
replace id=2 if czone==28001 & state==31
replace id=2 if czone==29901 & state==29
replace id=2 if czone==30300 & state==29
replace id=2 if czone==31006 & state==40

replace id=3 if czone==11304 & state==51
replace id=3 if czone==17100 & state==54
replace id=3 if czone==17400 & state==54
replace id=3 if czone==23600 & state==29
replace id=3 if czone==28001 & state==46
replace id=3 if czone==29901 & state==40
replace id=3 if czone==30300 & state==40
replace id=3 if czone==31006 & state==48

*THIS IS WHAT CHANGES
drop if id==1

* Use tsset to be able to easily work with two parts of each czone (czone part 0 and czone part 1)
set type double
gen czone_sic87xx=czone*10000+sic87xx
format %15.0g czone_sic87xx
gen czone_sic87xx_year=czone_sic87xx*10000+year
format %15.0g czone_sic87xx_year
sort czone_sic87xx_year state

tsset czone_sic87xx_year state

gen stcz=0
replace stcz=1 if L.state<state | L2.state<state | L3.state<state | L4.state<state | L5.state<state | L6.state<state | L7.state<state | L8.state<state | L9.state<state | L10.state<state | L11.state<state | L12.state<state | L13.state<state | L14.state<state | L15.state<state | L16.state<state | L17.state<state | L18.state<state | L19.state<state | L20.state<state | L21.state<state | L22.state<state | L23.state<state | L24.state<state | L25.state<state | L26.state<state | L27.state<state | L28.state<state | L29.state<state | L30.state<state | L31.state<state | L32.state<state | L33.state<state | L34.state<state | L35.state<state | L36.state<state | L37.state<state | L38.state<state | L39.state<state | L40.state<state | L41.state<state | L42.state<state | L43.state<state | L44.state<state | L45.state<state | L46.state<state | L47.state<state | L48.state<state | L49.state<state | L50.state<state | L51.state<state | L52.state<state | L53.state<state | L54.state<state | L55.state<state | L56.state<state | L57.state<state | L58.state<state | L59.state<state | L60.state<state
order year sic87xx czone state stcz
tsset czone_sic87xx_year stcz


*USE THE MAXIMUM MINIMUM WAGE
gen mw=max_mw

foreach b in "czone" "multi" "state" "emp" "ap" "qp1" "est" "mw" "totpop" "workagepop" "totemp" "totap" "totqp1" "totest" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" {
gen `b'_0=L.`b'
gen `b'_1=`b'
}

keep if stcz==1

drop czone multi state emp ap qp1 est mw totpop workagepop totemp totap totqp1 totest wageap wageqp totwageap totwageqp totwageapm totwageqpm


label var sic87xx "20 industries"
label var sic87xx_desc "20 industries description"
*label var czone "AADHP 129 two-state commuting zones"

foreach t in "0" "1" {
label var state_`t' "State FIPS code `t'"
label var est_`t' "Number of establishments `t'"
label var emp_`t' "Employment `t'"
label var qp1_`t' "Nominal first quarter payroll `t'"
label var ap_`t' "Nominal annual payroll `t'"
label var mw_`t' "Minimum wage `t'"
label var totest_`t' "Number of establishments (czone-state)"
label var totemp_`t' "Employment (czone-state)"
label var totqp1_`t' "Nominal first quarter payroll (czone-state)"
label var totap_`t' "Nominal annual payroll (czone-state)"
label var wageap_`t' "Nominal hourly wage using AP (czone-state-ind)"
label var wageqp_`t' "Nominal hourly wage using QP (czone-state-ind)"
label var totwageap_`t' "Nominal hourly wage using AP (czone-state)"
label var totwageqp_`t' "Nominal hourly wage using QP (czone-state)"
label var totwageapm_`t' "Nominal hourly wage using AP MINUS (czone-state)"
label var totwageqpm_`t' "Nominal hourly wage using QP MINUS (czone-state)"
label var totpop_`t' "Total population czone-state"
label var workagepop_`t' "Total working age population czone-state"

} 


label var wagerank90 "Wage ranking of industry in 1990"

drop stcz statename stateabb min_fed_mw min_mw mean_fed_mw mean_mw max_fed_mw max_mw state_min state_max

tempfile a1
save `a1', replace

restore

**********************************************************************
preserve

*** THREE-STATE 2

* Load data at the czone-state-industry level
use "`datadir'`s'cbp_czone_state_20ind_sample.dta", clear

* Merge ranking data
merge m:1 sic87xx using "`cbpdir'`s'cbp_sic87xx_wagerank.dta"  
drop _merge

* Keep only multi-state czones
keep if three==1


gen id=0

*In three-state czones, drop the smallest (in terms of employment) czone
replace id=1 if czone==11304 & state==11
replace id=1 if czone==17100 & state==21
replace id=1 if czone==17400 & state==24
replace id=1 if czone==23600 & state==17
replace id=1 if czone==28001 & state==19
replace id=1 if czone==29901 & state==20
replace id=1 if czone==30300 & state==5
replace id=1 if czone==31006 & state==20

replace id=2 if czone==11304 & state==24
replace id=2 if czone==17100 & state==39
replace id=2 if czone==17400 & state==42
replace id=2 if czone==23600 & state==19
replace id=2 if czone==28001 & state==31
replace id=2 if czone==29901 & state==29
replace id=2 if czone==30300 & state==29
replace id=2 if czone==31006 & state==40

replace id=3 if czone==11304 & state==51
replace id=3 if czone==17100 & state==54
replace id=3 if czone==17400 & state==54
replace id=3 if czone==23600 & state==29
replace id=3 if czone==28001 & state==46
replace id=3 if czone==29901 & state==40
replace id=3 if czone==30300 & state==40
replace id=3 if czone==31006 & state==48

drop if id==2



* Use tsset to be able to easily work with two parts of each czone (czone part 0 and czone part 1)
set type double
gen czone_sic87xx=czone*10000+sic87xx
format %15.0g czone_sic87xx
gen czone_sic87xx_year=czone_sic87xx*10000+year
format %15.0g czone_sic87xx_year
sort czone_sic87xx_year state

tsset czone_sic87xx_year state

gen stcz=0
replace stcz=1 if L.state<state | L2.state<state | L3.state<state | L4.state<state | L5.state<state | L6.state<state | L7.state<state | L8.state<state | L9.state<state | L10.state<state | L11.state<state | L12.state<state | L13.state<state | L14.state<state | L15.state<state | L16.state<state | L17.state<state | L18.state<state | L19.state<state | L20.state<state | L21.state<state | L22.state<state | L23.state<state | L24.state<state | L25.state<state | L26.state<state | L27.state<state | L28.state<state | L29.state<state | L30.state<state | L31.state<state | L32.state<state | L33.state<state | L34.state<state | L35.state<state | L36.state<state | L37.state<state | L38.state<state | L39.state<state | L40.state<state | L41.state<state | L42.state<state | L43.state<state | L44.state<state | L45.state<state | L46.state<state | L47.state<state | L48.state<state | L49.state<state | L50.state<state | L51.state<state | L52.state<state | L53.state<state | L54.state<state | L55.state<state | L56.state<state | L57.state<state | L58.state<state | L59.state<state | L60.state<state
order year sic87xx czone state stcz
tsset czone_sic87xx_year stcz


*USE THE MAXIMUM MINIMUM WAGE
gen mw=max_mw

foreach b in "czone" "multi" "state" "emp" "ap" "qp1" "est" "mw" "totpop" "workagepop" "totemp" "totap" "totqp1" "totest" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" {
gen `b'_0=L.`b'
gen `b'_1=`b'
}

keep if stcz==1

drop czone multi state emp ap qp1 est mw totpop workagepop totemp totap totqp1 totest wageap wageqp totwageap totwageqp totwageapm totwageqpm


label var sic87xx "20 industries"
label var sic87xx_desc "20 industries description"
*label var czone "AADHP 129 two-state commuting zones"

foreach t in "0" "1" {
label var state_`t' "State FIPS code `t'"
label var est_`t' "Number of establishments `t'"
label var emp_`t' "Employment `t'"
label var qp1_`t' "Nominal first quarter payroll `t'"
label var ap_`t' "Nominal annual payroll `t'"
label var mw_`t' "Minimum wage `t'"
label var totest_`t' "Number of establishments (czone-state)"
label var totemp_`t' "Employment (czone-state)"
label var totqp1_`t' "Nominal first quarter payroll (czone-state)"
label var totap_`t' "Nominal annual payroll (czone-state)"
label var wageap_`t' "Nominal hourly wage using AP (czone-state-ind)"
label var wageqp_`t' "Nominal hourly wage using QP (czone-state-ind)"
label var totwageap_`t' "Nominal hourly wage using AP (czone-state)"
label var totwageqp_`t' "Nominal hourly wage using QP (czone-state)"
label var totwageapm_`t' "Nominal hourly wage using AP MINUS (czone-state)"
label var totwageqpm_`t' "Nominal hourly wage using QP MINUS (czone-state)"
label var totpop_`t' "Total population czone-state"
label var workagepop_`t' "Total working age population czone-state"

} 


label var wagerank90 "Wage ranking of industry in 1990"

drop stcz statename stateabb min_fed_mw min_mw mean_fed_mw mean_mw max_fed_mw max_mw state_min state_max 


*ONLY FOR SAMPLE 2

replace id=1

tempfile a2
save `a2', replace

restore

**********************************************************************
preserve

*** THREE-STATE 3

* Load data at the czone-state-industry level
use "`datadir'`s'cbp_czone_state_20ind_sample.dta", clear

* Merge ranking data
merge m:1 sic87xx using "`cbpdir'`s'cbp_sic87xx_wagerank.dta"  
drop _merge

* Keep only multi-state czones
keep if three==1

gen id=0

*In three-state czones, drop the smallest (in terms of employment) czone
replace id=1 if czone==11304 & state==11
replace id=1 if czone==17100 & state==21
replace id=1 if czone==17400 & state==24
replace id=1 if czone==23600 & state==17
replace id=1 if czone==28001 & state==19
replace id=1 if czone==29901 & state==20
replace id=1 if czone==30300 & state==5
replace id=1 if czone==31006 & state==20

replace id=2 if czone==11304 & state==24
replace id=2 if czone==17100 & state==39
replace id=2 if czone==17400 & state==42
replace id=2 if czone==23600 & state==19
replace id=2 if czone==28001 & state==31
replace id=2 if czone==29901 & state==29
replace id=2 if czone==30300 & state==29
replace id=2 if czone==31006 & state==40

replace id=3 if czone==11304 & state==51
replace id=3 if czone==17100 & state==54
replace id=3 if czone==17400 & state==54
replace id=3 if czone==23600 & state==29
replace id=3 if czone==28001 & state==46
replace id=3 if czone==29901 & state==40
replace id=3 if czone==30300 & state==40
replace id=3 if czone==31006 & state==48

drop if id==3


* Use tsset to be able to easily work with two parts of each czone (czone part 0 and czone part 1)
set type double
gen czone_sic87xx=czone*10000+sic87xx
format %15.0g czone_sic87xx
gen czone_sic87xx_year=czone_sic87xx*10000+year
format %15.0g czone_sic87xx_year
sort czone_sic87xx_year state

tsset czone_sic87xx_year state

gen stcz=0
replace stcz=1 if L.state<state | L2.state<state | L3.state<state | L4.state<state | L5.state<state | L6.state<state | L7.state<state | L8.state<state | L9.state<state | L10.state<state | L11.state<state | L12.state<state | L13.state<state | L14.state<state | L15.state<state | L16.state<state | L17.state<state | L18.state<state | L19.state<state | L20.state<state | L21.state<state | L22.state<state | L23.state<state | L24.state<state | L25.state<state | L26.state<state | L27.state<state | L28.state<state | L29.state<state | L30.state<state | L31.state<state | L32.state<state | L33.state<state | L34.state<state | L35.state<state | L36.state<state | L37.state<state | L38.state<state | L39.state<state | L40.state<state | L41.state<state | L42.state<state | L43.state<state | L44.state<state | L45.state<state | L46.state<state | L47.state<state | L48.state<state | L49.state<state | L50.state<state | L51.state<state | L52.state<state | L53.state<state | L54.state<state | L55.state<state | L56.state<state | L57.state<state | L58.state<state | L59.state<state | L60.state<state
order year sic87xx czone state stcz
tsset czone_sic87xx_year stcz


*USE THE MAXIMUM MINIMUM WAGE
gen mw=max_mw

foreach b in "czone" "multi" "state" "emp" "ap" "qp1" "est" "mw" "totpop" "workagepop" "totemp" "totap" "totqp1" "totest" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" {
gen `b'_0=L.`b'
gen `b'_1=`b'
}

keep if stcz==1

drop czone multi state emp ap qp1 est mw totpop workagepop totemp totap totqp1 totest wageap wageqp totwageap totwageqp totwageapm totwageqpm

label var sic87xx "20 industries"
label var sic87xx_desc "20 industries description"
*label var czone "AADHP 129 two-state commuting zones"

foreach t in "0" "1" {
label var state_`t' "State FIPS code `t'"
label var est_`t' "Number of establishments `t'"
label var emp_`t' "Employment `t'"
label var qp1_`t' "Nominal first quarter payroll `t'"
label var ap_`t' "Nominal annual payroll `t'"
label var mw_`t' "Minimum wage `t'"
label var totest_`t' "Number of establishments (czone-state)"
label var totemp_`t' "Employment (czone-state)"
label var totqp1_`t' "Nominal first quarter payroll (czone-state)"
label var totap_`t' "Nominal annual payroll (czone-state)"
label var wageap_`t' "Nominal hourly wage using AP (czone-state-ind)"
label var wageqp_`t' "Nominal hourly wage using QP (czone-state-ind)"
label var totwageap_`t' "Nominal hourly wage using AP (czone-state)"
label var totwageqp_`t' "Nominal hourly wage using QP (czone-state)"
label var totwageapm_`t' "Nominal hourly wage using AP MINUS (czone-state)"
label var totwageqpm_`t' "Nominal hourly wage using QP MINUS (czone-state)"
label var totpop_`t' "Total population czone-state"
label var workagepop_`t' "Total working age population czone-state"
} 


label var wagerank90 "Wage ranking of industry in 1990"

drop stcz statename stateabb min_fed_mw min_mw mean_fed_mw mean_mw max_fed_mw max_mw state_min state_max 

tempfile a3
save `a3', replace

restore


append using "`a1'"
append using "`a2'"
append using "`a3'"

*DROP DC PAIRS
drop if state_0==11

replace id=0 if id==.

save "`datadir'`s'cbp_czonestatepairs_20ind_sample.dta", replace

*RESTAURANTS SAMPLE

*Keep only restaurant industry
keep if wagerank90==1

*Drop pairs within the same state
drop if state_0==state_1
gen check1=log(mw_1)-log(mw_0)
gen check2=log(emp_1)-log(emp_0)
drop if check1==. | check2==.
drop check1 check2

gen statea=state_0
gen stateb=state_1


*****MERGE DLR PAIR IDS SO THAT WE CAN USE PAIRTIMES==2 TO IDENTIFY DLR PAIRS
*FOR CZONE PAIRS, CZONE_0=CZONE_1
gen czone=czone_0 
merge m:1 czone state_0 state_1 using "`dlrdir'`s'cz_pairs_dlr.dta"
drop if _merge==2
drop _merge
drop pair_id
***********************************************************************


*czoneid gives you the number of pairs
egen czonestatepair=group(czone id)
drop czone

reshape long czone_ state_  est_ emp_ qp1_ ap_ mw_ totest_ totemp_ totqp1_ totap_ wageap_ wageqp_ totwageap_ totwageqp_ totwageapm_ totwageqpm_ totpop_ workagepop_, i(year czonestatepair) j(st)
drop st
foreach d in "czone" "state"  "est" "emp" "qp1" "ap" "mw" "totest" "totemp" "totqp1" "totap" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" "totpop" "workagepop" {
ren `d'_ `d'
}

*Entity variable (commuting zone-state)
gen czonestate=czone*100+state

save "`datadir'`s'cbp_stacked_czonestatepair_sample.dta", replace
clear
